County Figures

theme_c <- function(...){ 
   # font <- "Helvetica"   #assign font family up front
    theme_bw() %+replace%    #replace elements we want to change
    
    theme(
      
      
      #text elements
      plot.title = element_text(             #title
                   size = 16,                #set font size
                   face = 'bold',            #bold typeface
                   hjust = .5,
                   vjust = 3),               
      
      plot.subtitle = element_text(          #subtitle
                   size = 12,
                   hjust = .5,
                   face = 'italic',
                   vjust = 3),               #font size
      
      axis.title = element_text(             #axis titles
                   size = 12),               #font size
      
      axis.text = element_text(              #axis text
                   size = 9),
      legend.text = element_text(size = 12),
      # t, r, b, l
      plot.margin = unit(c(1,.5,.5,.5), "cm"),
      legend.position = "right",
      strip.text.x = element_text(size = 11, face = "bold", color="white"),
      strip.text.y = element_text(size = 11, face = "bold", color="white",angle=270),
      strip.background = element_rect(fill = "#3E3D3D")
      ) %+replace%
      theme(...)
   
}


pal <- c("red", "#10BAC5", "#1B10C5", "#EFB719", "#900C3F")

names(pal) <- c("Covidestim",
                '$P(S_1|untested)$ Centered at Survey Value',
                '$P(S_1|untested)$ and $\\beta$ Centered at Survey Values',
               "$Priors\\,Do\\,Not\\,Vary\\,by\\,State\\,and\\,Date$",
               "$\\beta$ Centered at Survey Value"
               )
targets_dir <- here("_targets")

county <- tar_read(ma_v1, 
                   store =targets_dir) %>% 
  mutate(version = "v1") %>%
  bind_rows(
    tar_read(ma_v2,
             store =targets_dir)
  ) %>%
   bind_rows(
    tar_read(ma_v3, 
             store =targets_dir)
  ) %>%
   bind_rows(
    tar_read(ma_v4, 
             store =targets_dir)
  )%>%
   bind_rows(
    tar_read(ma_v5,
             store =targets_dir)
  )%>%
   bind_rows(
    tar_read(ma_v6,
             store =targets_dir)
   )%>%
   bind_rows(
    tar_read(ma_v7,
             store =targets_dir)
   )


waste <- tar_read(waste, store =targets_dir)
county_names <- read_tsv(here("data/demographic/county_fips.tsv")) %>%
  rename_with(.cols = everything(), .fn =tolower) %>%
  bind_rows()

covidestim <- tar_read(covidestim_biweekly_county,
                        store =targets_dir) %>%
  select(-c(date)) %>% 
  distinct()

dates <- readRDS(here("data/data_raw/date_to_biweek.RDS"))


if(filter_versions) {
  county <- county %>%
    filter(version %in% c("v3", "v5", "v6", "v7")) 
}
wjoined <- county %>%
  # set Nantucket, Duke fips to Nantucket since we have wastewater data
  # for Nantucket
  mutate(fips = ifelse(grepl("25019", fips), "25019", fips)) %>%
  inner_join(waste) %>%
  left_join(covidestim, relationship = "many-to-many") %>%
  left_join(dates,  relationship = "many-to-many") %>%
  group_by(biweek) %>%
  mutate(mindate = min(date),
         maxdate = max(date)) %>%
  ungroup()

counties_with_waste <- wjoined %>%
  group_by(fips) %>%
  mutate(obs_w_notna = sum(!is.na(mean_conc))) %>%
  filter(obs_w_notna != 0) %>%
  pull(fips) %>%
  unique()



versions <- tibble(
  version = c("v1", "v2", "v3", "v4", "v5", "v6", "v7"),
  vlabel = factor(
    c( "$Priors\\,Do\\,Not\\,Vary\\,by\\,State\\,and\\,Date$", 
    "$\\beta$ Centered at Survey Value",
    "$P(S_1|untested)$ Centered at Survey Value",
    "$P(S_1|untested)$ and $\\beta$ Centered at Survey Values",
        "$\\beta$ Centered at Survey Value (Spline Smoothed)",
         "$P(S_1|untested)$ and $\\beta$ Centered at Survey Values ($\\beta$ Spline Smoothed)",
        "$\\,$CTIS Priors that Do Not Vary by State or Date$\\,$"
    ),
    levels = c(
        "$Priors\\,Do\\,Not\\,Vary\\,by\\,State\\,and\\,Date$", 
        "$\\beta$ Centered at Survey Value",
         "$P(S_1|untested)$ Centered at Survey Value",
        "$P(S_1|untested)$ and $\\beta$ Centered at Survey Values",
        "$\\beta$ Centered at Survey Value (Spline Smoothed)",
         "$P(S_1|untested)$ and $\\beta$ Centered at Survey Values ($\\beta$ Spline Smoothed)",
        "$\\,$CTIS Priors that Do Not Vary by State or Date$\\,$"
       
    ) )
)

wjoined <- wjoined %>%
  left_join(versions)


if(filter_versions) {
  
  county <- county %>%
    left_join(versions) %>%
    mutate(vlabel=case_when(
      version == "v7" ~  "$Priors\\,Do\\,Not\\,Vary\\,by\\,State\\,and\\,Date$",
      version == "v5" ~ "$\\beta$ Centered at Survey Value",
      version =="v6" ~  "$P(S_1|untested)$ and $\\beta$ Centered at Survey Values",
      TRUE ~ vlabel
    )) %>%
    mutate(vlabel =factor(vlabel,levels= c(
        "$Priors\\,Do\\,Not\\,Vary\\,by\\,State\\,and\\,Date$", 
         "$P(S_1|untested)$ Centered at Survey Value",
         "$\\beta$ Centered at Survey Value",
        "$P(S_1|untested)$ and $\\beta$ Centered at Survey Values"))) %>%
    left_join(county_names) %>%
    mutate(name = ifelse(fips =="25007,25019", "Nantucket and Dukes", name)) %>%
    left_join(dates,  relationship = "many-to-many") %>%
    group_by(biweek) %>%
    mutate(mindate = min(date),
           maxdate = max(date)) %>%
    ungroup() 


}

Correlations

Normalization Strategy 1: Standardization

w_data <- wjoined %>% 
  select(-date) %>%
  distinct() %>%
  filter(fips %in% counties_with_waste) %>%
  filter(!is.na(mean_conc)) %>%
  group_by(fips, version) %>%
  mutate(
    mean_conc_std = (mean_conc - mean(mean_conc))/ 
           sd(mean_conc),
    exp_cases_median_std = (exp_cases_median -  mean(exp_cases_median))/
                                   sd(exp_cases_median),
         infections_std = (infections - mean(infections))/
      sd(infections),
         exp_cases_lb_std = (exp_cases_lb - mean(exp_cases_lb))/
      sd(exp_cases_lb),
        exp_cases_ub_std = (exp_cases_ub - mean(exp_cases_ub))/
      sd(exp_cases_ub),
         positive_std = (positive - mean(positive))
    /sd(positive))  %>%
  ungroup()

PB Counts and Wastewater

# correlations between PB counts and wastewater concentrations

correlations <- w_data %>%
  group_by(version) %>%
  summarize(Correlation = cor(exp_cases_median_std,mean_conc_std),
            vlabel =unique(vlabel)) %>%
  mutate(x=2.8,
         y=.1)

w_data %>%
  group_by(version) %>%
  ggplot(aes(x=exp_cases_median_std, y = mean_conc_std)) +
  geom_point(alpha=.5) +
  geom_linerange(aes(xmin = exp_cases_lb_std,
                     xmax=exp_cases_ub_std, 
                     y = mean_conc_std),
                 alpha = .3) +
  geom_text(
           aes(x=x,y=y,  
               label=paste0("Correlation: ", 
                                      round(Correlation,3))), 
                data= correlations ) +
  facet_wrap(~vlabel,
             ncol=2,
             labeller= as_labeller(TeX, 
                                   default = label_parsed)) +
  theme_c() +
  labs(x = "Probabilistic Bias Infections\n(Standardized by County)",
       y = "Mean Wastewater Concentration\n(Standardized by County)",
       title = "Correlations Between Probabilistic Bias Infections\nand Mean Wastewater Concentration")

plt_wastewater_pb <- w_data %>%
  group_by(version) %>%
  ggplot(aes(x=exp_cases_median_std, y = mean_conc_std)) +
  geom_point(alpha=.5) +
  geom_linerange(aes(xmin = exp_cases_lb_std,
                     xmax=exp_cases_ub_std, 
                     y = mean_conc_std),
                 alpha = .3) +
  geom_text(
           aes(x=x,y=y,  label=paste0("Correlation: ", 
                                      round(Correlation,3))), 
                data= correlations ) +
  facet_wrap(~vlabel,
             ncol=1,
             labeller= as_labeller(TeX, 
                                   default = label_parsed)) +
  theme_c() +
  labs(x = "Probabilistic Bias Infections\n(Standardized by County)",
       y = "Mean Wastewater Concentration\n(Standardized by County)",
       title = "Correlations with Wastewater Concentrations")

PB Counts and Covidestim

# correlations between PB counts and Covidestim estimates


covidestim_correlations <- w_data %>%
  filter(!is.na(infections_std)) %>%
  group_by(vlabel) %>%
  # missing values are handled by casewise deletion
  summarize(Correlation = cor(exp_cases_median_std,infections_std,
                              use = "complete.obs")) %>%
  mutate(x=2.8,
         y=.1)



w_data %>%
  ggplot(aes(x=exp_cases_median_std, y = infections_std)) +
  geom_point(alpha=.5) +
  geom_linerange(aes(xmin = exp_cases_lb_std,
                     xmax=exp_cases_ub_std, 
                     y = infections_std),
                 alpha = .3) +
  geom_text(
           aes(x=x,y=y,  label=paste0("Correlation: ", 
                                      round(Correlation,3))), 
                data= covidestim_correlations ) +
  facet_wrap(~vlabel,
              ncol=2,
             labeller= as_labeller(TeX, 
                                   default = label_parsed)) +
  theme_c() +
  labs(x = "Probabilistic Bias Infections\n(Standardized by County)",
       y = "Covidestim Estimates\n(Standardized by County)",
       title = "Correlations Between Probabilistic Bias Infections\nand Covidestim Estimates")

plt_covidestim_pb <- w_data %>%
  ggplot(aes(x=exp_cases_median_std, y = infections_std)) +
  geom_point(alpha=.5) +
  geom_linerange(aes(xmin = exp_cases_lb_std,
                     xmax=exp_cases_ub_std, 
                     y = infections_std),
                 alpha = .3) +
  geom_text(
           aes(x=x,y=y,  label=paste0("Correlation: ", 
                                      round(Correlation,3))), 
                data= covidestim_correlations ) +
  facet_wrap(~vlabel,
              ncol=1,
             labeller= as_labeller(TeX, 
                                   default = label_parsed)) +
  theme_c() +
  labs(x = "Probabilistic Bias Infections\n(Standardized by County)",
       y = "Covidestim Estimates\n(Standardized by County)",
       title = "Correlations with Covidestim Infections")

Combined Figure

cowplot::plot_grid(plt_wastewater_pb, plt_covidestim_pb, ncol=2,  labels="auto", label_size=19)

ggsave(here("figures/correlations-wastewater-by-version.pdf"))
cor_covid <- covidestim_correlations %>% 
  select(vlabel, 
         `Correlation with Covidestim Estimates`=Correlation)

cor_waste <- correlations %>% 
  select(vlabel, 
         `Correlation with Wastewater Concentrations`=Correlation)


cor_waste %>%
  left_join(cor_covid) %>%
  mutate(vlabel = gsub(
    "$Priors\\,Do\\,Not\\,Vary\\,by\\,State\\,and\\,Date$", 
    "Priors Do Not Vary by State and Date",
    vlabel, fixed=TRUE),vlabel=gsub(" at","\nat", vlabel)) %>%
  arrange(desc( `Correlation with Wastewater Concentrations`)) %>%
  rename(Implementation = vlabel) %>%
  kbl() %>%
  kable_styling(full_width = TRUE)
Implementation Correlation with Wastewater Concentrations Correlation with Covidestim Estimates
\(\,\)CTIS Priors that Do Not Vary by State or Date\(\,\) 0.8976666 0.9861254
\(P(S_1|untested)\) Centered at Survey Value 0.8951465 0.9855135
\(\beta\) Centered at Survey Value (Spline Smoothed) 0.8921199 0.9825280
\(P(S_1|untested)\) and \(\beta\) Centered at Survey Values (\(\beta\) Spline Smoothed) 0.8856435 0.9791947

Covidestim and Wastewater

covidestim_correlations <- w_data %>%
  filter(version=="v7") %>%
  # missing values are handled by casewise deletion
  summarize(Correlation = cor(mean_conc_std,infections_std,
                              use = "complete.obs")) %>%
  mutate(x=1,
         y=.1) 



w_data %>%
  filter(version=="v7") %>%
  ggplot(aes(x=infections_std, y = mean_conc_std)) +
  geom_point(alpha=.5) +
  geom_text(
           aes(x=x,y=y,  label=paste0("Correlation: ", 
                                      round(Correlation,3))), 
                data= covidestim_correlations ) +
  theme_c() +
  labs(x = "Covidestim Estimates\n(Standardized by County)",
       y = "Wastewater Concentrations\n(Standardized by County)",
       title = "Correlations Between Wastewater Concentrations\nand Covidestim Estimates")

pb_correlations <- w_data %>%
  filter(version=="v7") %>%
  # missing values are handled by casewise deletion
  summarize(Correlation = cor(positive_std,mean_conc_std,
                              use = "complete.obs")) %>%
  mutate(x=1.8,
         y=.1) 

Compare to Wastewater Over Time

library(scales)

pal <- c("#10BAC5", "#1B10C5", "#EFB719", "#900C3F")

names(pal) <- c(#"$observed$", 
                '$P(S_1|untested)$ Centered at Survey Value',
                '$P(S_1|untested)$ and $\\beta$ Centered at Survey Values',
               "$Priors\\,Do\\,Not\\,Vary\\,by\\,State\\,and\\,Date$",
               "$\\beta$ Centered at Survey Value"
               )



w_data <- w_data %>%
  group_by(name) %>%
  mutate(adjust = 1/(max(mean_conc))*(exp_cases_median[which.max(mean_conc)])) %>%
  ungroup()

waste_df <- w_data %>%
  select(biweek, mean_conc, name, adjust, fips) %>%
  distinct() %>%
  left_join(dates,relationship = "many-to-many" ) %>%
  group_by(name, biweek) %>%
  # center date within 2-week interval
  summarize(date = min(date) + days(7),
            mean_conc =unique(mean_conc),
            adjust=unique(adjust),
            fips = unique(fips)) %>%
  mutate(name = gsub("County, MA", "", name))


pltlist <- w_data %>%
  mutate(name = gsub("County, MA", "", name)) %>%
  left_join(dates,relationship = "many-to-many" ) %>%
  # filter(version %in% c("v7","v3")) %>%
  filter(version=="v7") %>%
  filter(biweek>=6) %>%
  group_by(biweek) %>%
  mutate(mindate = min(date),
         maxdate=max(date)) %>%
  ungroup() %>% 
  group_by(name) %>%
  group_split() %>%
  map(~ { 
    
    county_fips <- .x$fips %>% unique()
    adj <- .x$adjust %>% unique()
    
    .x %>% 
 #   filter(fips == county_fips  & date >= begin_date & date <= end_date) %>%
    ggplot() +
      geom_ribbon(aes(x = date, 
                 ymin = exp_cases_lb,
                 ymax = exp_cases_ub,
                 fill = vlabel),
                 alpha = .5) +
      geom_line(data = waste_df[which(waste_df$fips == county_fips),],
                aes(x = date, y =mean_conc*adj, 
                    color = 'Wastewater Effective Concentrations'),
               # color = "#DB4048",
                size = 1.1) +
      geom_linerange(aes(xmin = mindate, 
                         xmax = maxdate, 
                         y = infections,
                         color = 'Covidestim Infection Estimates')) + 
      geom_point(data =  waste_df[which(waste_df$fips == county_fips),],
                aes(x = date, y =mean_conc*adj ),
                color = "#DB4048",
                alpha = .5,
                size = 1.2) +
      scale_y_continuous(sec.axis=sec_axis(~.x/adj,
                                           name = 'Mean Wastewater Effective Concentration',
                                           labels=comma),
                         labels=comma
                    ) +
      # facet_grid(name~vlabel, 
      #            labeller=labeller(vlabel =as_labeller(
      #              TeX, default=label_parsed))) +
      facet_wrap(~name, ncol=3) + 
      scale_fill_manual(values = pal, labels = c('','','',''), 
                        name = "Probabilistic Bias Intervals") +
     theme_c(legend.position="none") +
      labs(y = "Value (Standardized by County)",
           title = "",
           x= "") +
      scale_x_date(date_labels = "%b %Y") +
    scale_color_manual(
      name = '', 
      values = c(
        'Covidestim Infection Estimates' = 'darkblue',
        'Wastewater Effective Concentrations' = '#DB4048')) +
    guides(color = guide_legend(override.aes = list(size = 12,
                                                    linewidth=3)),
           fill = guide_legend(override.aes =list(size = 6))) 
    })

legend <- cowplot::get_legend(
  pltlist[[1]] + theme(legend.position="top")
)

grid <- cowplot::plot_grid(plotlist=pltlist, ncol =3)

cowplot::plot_grid(legend, grid, nrow=2, rel_heights=c(.05,.95))

ggsave(here('figures/simulation-intervals-wastewater.pdf'))

Normalization Strategy 2: By Values at Biweek Where Wastewater Concentration is Maximized

w_data <- wjoined %>% 
  select(-date) %>%
  distinct() %>%
  filter(fips %in% counties_with_waste) %>%
  filter(!is.na(mean_conc)) %>%
  group_by(fips, version) %>%
  mutate(mean_conc_std = mean_conc/ max(mean_conc),
         max_conc = max(mean_conc),
         exp_cases_median_std = exp_cases_median / exp_cases_median[which.max(mean_conc)],
         exp_cases_lb_std = exp_cases_lb /exp_cases_median[which.max(mean_conc)],
         exp_cases_ub_std = exp_cases_ub / exp_cases_median[which.max(mean_conc)],
         infections_std  = infections/ infections[which.max(mean_conc)],
         positive_std = positive/max(positive)) %>%
  ungroup()

PB Counts and Wastewater

# correlations between PB counts and wastewater concentrations

correlations <- w_data %>%
  group_by(version) %>%
  summarize(Correlation = cor(exp_cases_median_std,mean_conc_std),
            vlabel =unique(vlabel)) %>%
  mutate(x=1.8,
         y=.1)

w_data %>%
  group_by(version) %>%
  ggplot(aes(x=exp_cases_median_std, y = mean_conc_std)) +
  geom_point(alpha=.5) +
  geom_linerange(aes(xmin = exp_cases_lb_std,
                     xmax=exp_cases_ub_std, 
                     y = mean_conc_std),
                 alpha = .3) +
  geom_text(
           aes(x=x,y=y,  label=paste0("Correlation: ", 
                                      round(Correlation,3))), 
                data= correlations ) +
  facet_wrap(~vlabel,
             ncol=2,
             labeller= as_labeller(TeX, 
                                   default = label_parsed)) +
  theme_c() +
  labs(x = "Probabilistic Bias Infections\n(Standardized by County)",
       y = "Mean Wastewater Concentration\n(Standardized by County)",
       title = "Correlations Between Probabilistic Bias Infections\nand Mean Wastewater Concentration")


plt_wastewater_pb <- w_data %>%
  group_by(version) %>%
  ggplot(aes(x=exp_cases_median_std, y = mean_conc_std)) +
  geom_point(alpha=.5) +
  geom_linerange(aes(xmin = exp_cases_lb_std,
                     xmax=exp_cases_ub_std, 
                     y = mean_conc_std),
                 alpha = .3) +
  geom_text(
           aes(x=x,y=y,  label=paste0("Correlation: ", 
                                      round(Correlation,3))), 
                data= correlations ) +
  facet_wrap(~vlabel,
             ncol=1,
             labeller= as_labeller(TeX, 
                                   default = label_parsed)) +
  theme_c() +
  labs(x = "Probabilistic Bias Infections\n(Standardized by County by County)",
       y = "Mean Wastewater Concentration\n(Standardized by County by County)",
       title = "")

PB Counts and Covidestim

# correlations between PB counts and Covidestim estimates


covidestim_correlations <- w_data %>%
  filter(!is.na(infections_std)) %>%
  group_by(vlabel) %>%
  # missing values are handled by casewise deletion
  summarize(Correlation = cor(exp_cases_median_std,infections_std,
                              use = "complete.obs")) %>%
  mutate(x=1.8,
         y=.1)



w_data %>%
  ggplot(aes(x=exp_cases_median_std, y = infections_std)) +
  geom_point(alpha=.5) +
  geom_linerange(aes(xmin = exp_cases_lb_std,
                     xmax=exp_cases_ub_std, 
                     y = infections_std),
                 alpha = .3) +
  geom_text(
           aes(x=x,y=y,  label=paste0("Correlation: ", 
                                      round(Correlation,3))), 
                data= covidestim_correlations ) +
  facet_wrap(~vlabel,
              ncol=2,
             labeller= as_labeller(TeX, 
                                   default = label_parsed)) +
  theme_c() +
  labs(x = "Probabilistic Bias Infections\n(Standardized by County by County)",
       y = "Covidestim Estimates\n(Standardized by County by County)",
       title = "Correlations Between Probabilistic Bias Infections\nand Covidestim Estimates")


plt_covidestim_pb <- w_data %>%
  ggplot(aes(x=exp_cases_median_std, y = infections_std)) +
  geom_point(alpha=.5) +
  geom_linerange(aes(xmin = exp_cases_lb_std,
                     xmax=exp_cases_ub_std, 
                     y = infections_std),
                 alpha = .3) +
  geom_text(
           aes(x=x,y=y,  label=paste0("Correlation: ", 
                                      round(Correlation,3))), 
                data= covidestim_correlations ) +
  facet_wrap(~vlabel,
              ncol=1,
             labeller= as_labeller(TeX, 
                                   default = label_parsed)) +
  theme_c() +
  labs(x = "Probabilistic Bias Infections\n(Standardized by County by County)",
       y = "Covidestim Estimates\n(Standardized by County by County)",
       title = "")

Combined Figure

cowplot::plot_grid(plt_wastewater_pb, plt_covidestim_pb, ncol=2)


cor_covid <- covidestim_correlations %>% 
  select(vlabel, 
         `Correlation with Covidestim Estimates`=Correlation)

cor_waste <- correlations %>% 
  select(vlabel, 
         `Correlation with Wastewater Concentrations`=Correlation)


cor_waste %>%
  left_join(cor_covid) %>%
  mutate(vlabel = gsub(
    "$Priors\\,Do\\,Not\\,Vary\\,by\\,State\\,and\\,Date$", 
    "Priors Do Not Vary by County and Date",
    vlabel, fixed=TRUE),vlabel=gsub(" at","\nat", vlabel)) %>%
  arrange(desc( `Correlation with Wastewater Concentrations`)) %>%
  rename(Implementation = vlabel) %>%
  kbl() %>%
  kable_styling(full_width = TRUE)

Covidestim and Wastewater

covidestim_correlations <- w_data %>%
  filter(version=="v7") %>%
  # missing values are handled by casewise deletion
  summarize(Correlation = cor(mean_conc_std,infections_std,
                              use = "complete.obs")) %>%
  mutate(x=1,
         y=.1) 



w_data %>%
  filter(version=="v7") %>%
  ggplot(aes(x=infections_std, y = mean_conc_std)) +
  geom_point(alpha=.5) +
  geom_text(
           aes(x=x,y=y,  label=paste0("Correlation: ", 
                                      round(Correlation,3))), 
                data= covidestim_correlations ) +
  theme_c() +
  labs(x = "Covidestim Estimates\n(Standardized by County by County)",
       y = "Wastewater Concentrations\n(Standardized by County by County)",
       title = "Correlations Between Probabilistic Bias Infections\nand Covidestim Estimates")




pb_correlations <- w_data %>%
  filter(version=="v7") %>%
  # missing values are handled by casewise deletion
  summarize(Correlation = cor(positive_std,mean_conc_std,
                              use = "complete.obs")) %>%
  mutate(x=1.8,
         y=.1) 

Compare to Wastewater Over Time

library(scales)

pal <- c("#10BAC5", "#1B10C5", "#EFB719", "#900C3F")

names(pal) <- c(#"$observed$", 
                '$P(S_1|untested)$ Centered at Survey Value',
                '$P(S_1|untested)$ and $\\beta$ Centered at Survey Values',
               "$Priors\\,Do\\,Not\\,Vary\\,by\\,State\\,and\\,Date$",
               "$\\beta$ Centered at Survey Value"
               )


waste_df <- w_data %>%
  select(biweek, mean_conc_std, name) %>%
  distinct() %>%
  left_join(dates,relationship = "many-to-many" ) %>%
  group_by(name, biweek) %>%
  # center date within 2-week interval
  summarize(date = min(date) + days(7),
            mean_conc_std=unique(mean_conc_std)) %>%
  mutate(name = gsub("County, MA", "", name))


w_data%>%
  mutate(name = gsub("County, MA", "", name)) %>%
  left_join(dates,relationship = "many-to-many" ) %>%
  filter(version %in% c("v7","v3")) %>%
  filter(biweek>=6) %>%
  group_by(biweek) %>%
  mutate(mindate = min(date),
         maxdate=max(date)) %>%
  ungroup() %>% 
 #   filter(fips == county_fips  & date >= begin_date & date <= end_date) %>%
    ggplot() +
      geom_ribbon(aes(x = date, 
                 ymin = exp_cases_lb_std,
                 ymax = exp_cases_ub_std,
                 fill = vlabel),
                 alpha = .5) +
      geom_line(data = waste_df,
                aes(x = date, y =mean_conc_std, 
                    color = 'Wastewater Effective Concentrations'),
               # color = "#DB4048",
                size = 1.1) +
      geom_linerange(aes(xmin = mindate, 
                         xmax = maxdate, 
                         y = infections_std,
                         color = 'Covidestim Infection Estimates')) + 
      geom_point(data = waste_df,
                aes(x = date, y =mean_conc_std ),
                color = "#DB4048",
                alpha = .5,
                size = 1.2) +
      facet_grid(name~vlabel, 
                 labeller=labeller(vlabel =as_labeller(
                   TeX, default=label_parsed))) +
      scale_fill_manual(values = pal, labels = c('','','',''), 
                        name = "Probabilistic Bias Intervals") +
      theme_bw() +
     theme_c() +
      labs(y = "Value (Standardized by County by County)",
           title = "",
           x= "") +
      scale_x_date(date_labels = "%b %Y") +
    scale_color_manual(
      name = '', 
      values = c(
        'Covidestim Infection Estimates' = 'darkblue',
        'Wastewater Effective Concentrations' = '#DB4048')) +
    guides(color = guide_legend(override.aes = list(size = 12,
                                                    linewidth=3)),
           fill = guide_legend(override.aes =list(size = 6)))

Normalization Strategy 3: By Maximum (Separately)

For each county, normalize the wastewater concentration by the maximum concentration for that county and the Probabilistic Bias Infections by the maximum median estimated counts for that county (2.5th percentile and 97.5th percentile also standardized by the maximum median estimated counts for that county).

PB Counts and Wastewater

# correlations between PB counts and wastewater concentrations

correlations <- w_data %>%
  group_by(version) %>%
  summarize(Correlation = cor(exp_cases_median_std,mean_conc_std),
            vlabel =unique(vlabel)) %>%
  mutate(x=1.8,
         y=.1)

w_data %>%
  group_by(version) %>%
  ggplot(aes(x=exp_cases_median_std, y = mean_conc_std)) +
  geom_point(alpha=.5) +
  geom_linerange(aes(xmin = exp_cases_lb_std,
                     xmax=exp_cases_ub_std, 
                     y = mean_conc_std),
                 alpha = .3) +
  geom_text(
           aes(x=x,y=y,  label=paste0("Correlation: ", 
                                      round(Correlation,3))), 
                data= correlations ) +
  facet_wrap(~vlabel,
             ncol=2,
             labeller= as_labeller(TeX, 
                                   default = label_parsed)) +
  theme_c() +
  labs(x = "Probabilistic Bias Infections\n(Standardized by County)",
       y = "Mean Wastewater Concentration\n(Standardized by County)",
       title = "Correlations Between Probabilistic Bias Infections\nand Mean Wastewater Concentration")



plt_wastewater_pb <- w_data %>%
  group_by(version) %>%
  ggplot(aes(x=exp_cases_median_std, y = mean_conc_std)) +
  geom_point(alpha=.5) +
  geom_linerange(aes(xmin = exp_cases_lb_std,
                     xmax=exp_cases_ub_std, 
                     y = mean_conc_std),
                 alpha = .3) +
  geom_text(
           aes(x=x,y=y,  label=paste0("Correlation: ", 
                                      round(Correlation,3))), 
                data= correlations ) +
  facet_wrap(~vlabel,
             ncol=1,
             labeller= as_labeller(TeX, 
                                   default = label_parsed)) +
  theme_c() +
  labs(x = "Probabilistic Bias Infections\n(Standardized by County)",
       y = "Mean Wastewater Concentration\n(Standardized by County)",
       title = "")

PB Counts and Covidestim

# correlations between PB counts and Covidestim estimates


covidestim_correlations <- w_data %>%
  filter(!is.na(infections_std)) %>%
  group_by(vlabel) %>%
  # missing values are handled by casewise deletion
  summarize(Correlation = cor(exp_cases_median_std,infections_std,
                              use = "complete.obs")) %>%
  mutate(x=1.8,
         y=.1)



w_data %>%
  ggplot(aes(x=exp_cases_median_std, y = infections_std)) +
  geom_point(alpha=.5) +
  geom_linerange(aes(xmin = exp_cases_lb_std,
                     xmax=exp_cases_ub_std, 
                     y = infections_std),
                 alpha = .3) +
  geom_text(
           aes(x=x,y=y,  label=paste0("Correlation: ", 
                                      round(Correlation,3))), 
                data= covidestim_correlations ) +
  facet_wrap(~vlabel,
             ncol=2,
             labeller= as_labeller(TeX, 
                                   default = label_parsed)) +
  theme_c() +
  labs(x = "Probabilistic Bias Infections\n(Standardized by County)",
       y = "Covidestim Estimates\n(Standardized by County)",
       title = "Correlations Between Probabilistic Bias Infections\nand Covidestim Estimates")


plt_covidestim_pb <- w_data %>%
  ggplot(aes(x=exp_cases_median_std, y = infections_std)) +
  geom_point(alpha=.5) +
  geom_linerange(aes(xmin = exp_cases_lb_std,
                     xmax=exp_cases_ub_std, 
                     y = infections_std),
                 alpha = .3) +
  geom_text(
           aes(x=x,y=y,  label=paste0("Correlation: ", 
                                      round(Correlation,3))), 
                data= covidestim_correlations ) +
  facet_wrap(~vlabel,
              ncol=1,
             labeller= as_labeller(TeX, 
                                   default = label_parsed)) +
  theme_c() +
  labs(x = "Probabilistic Bias Infections\n(Standardized by County)",
       y = "Covidestim Estimates\n(Standardized by County)",
       title = "")

Combined Figure

cowplot::plot_grid(plt_wastewater_pb, plt_covidestim_pb, ncol=2)

Covidestim and Wastewater

covidestim_correlations <- w_data %>%
  filter(version=="v7") %>%
  # missing values are handled by casewise deletion
  summarize(Correlation = cor(mean_conc_std,infections_std,
                              use = "complete.obs")) %>%
  mutate(x=1,
         y=.1) 



w_data %>%
  filter(version=="v7") %>%
  ggplot(aes(x=infections_std, y = mean_conc_std)) +
  geom_point(alpha=.5) +
  geom_text(
           aes(x=x,y=y,  label=paste0("Correlation: ", 
                                      round(Correlation,3))), 
                data= covidestim_correlations ) +
  theme_c() +
  labs(x = "Covidestim Estimates\n(Standardized by County)",
       y = "Wastewater Concentrations\n(Standardized by County)",
       title = "Correlations Between Probabilistic Bias Infections\nand Covidestim Estimates")




pb_correlations <- w_data %>%
  filter(version=="v7") %>%
  # missing values are handled by casewise deletion
  summarize(Correlation = cor(positive_std,mean_conc_std,
                              use = "complete.obs")) %>%
  mutate(x=1,
         y=.1) 

Simulation Intervals Over Time

county %>%
  filter(version=="v7")  %>%
   ggplot() +
  geom_ribbon(aes(x = date, 
             ymin = exp_cases_lb,
             ymax = exp_cases_ub),
             alpha = .5,
             show.legend=FALSE,
             fill="#596281") +
   geom_linerange(aes(xmin = mindate,
                      xmax=maxdate,
                      y = positive,
                      color = 'obs'),
                  size=.5) +
  facet_wrap(~name, scales="free_y", ncol=3) +
  scale_y_continuous(labels = comma) +
  scale_x_date(date_breaks = "3 months", 
               date_labels = "%b %Y") +
  theme_c(
    axis.text.x=element_text(size=7),
          legend.position = "top",
          ) +
  scale_fill_manual(values = pal)  +
  labs(x="",
       y = "Estimated Infections",
       title = "Simulation Intervals Over Time",
       subtitle = "Counties in Massachusetts") +
  scale_color_manual(values = c('obs' = 'red'), labels = 'Positive Tests',
                     name = '') +
  guides(color = guide_legend(override.aes = list(linewidth =2)))

ggsave(here('figures/simulation-intervals-county.pdf'))
county %>%
  filter(fips %in% sample(unique(.data$fips),2))  %>% 
   ggplot() +
  geom_ribbon(aes(x = date, 
             ymin = exp_cases_lb,
             ymax = exp_cases_ub),
             alpha = .5,
             show.legend=FALSE,
             fill="#596281") +
   geom_linerange(aes(xmin = mindate,
                      xmax=maxdate,
                      y = positive,
                      color = 'obs'),
                  size=.5) +
  facet_grid(name~vlabel, 
             labeller=labeller(vlabel =as_labeller(
               TeX, default=label_parsed)),
             scales="free_y") +
  scale_y_continuous(labels = comma) +
  scale_x_date(date_breaks = "3 months", 
               date_labels = "%b %Y") +
  theme_c(
          legend.position = "top",
          ) +
  scale_fill_manual(values = pal)  +
  labs(x="",
       y = "Estimated Infections",
       title = "Simulation Intervals Over Time",
       subtitle = "Counties in Massachusetts") +
  scale_color_manual(values = c('obs' = 'red'), labels = 'Positive Tests',
                     name = '') +
  guides(color = guide_legend(override.aes = list(linewidth =2)))

Ratio of Estimated Infections to Observed

county %>%
  filter(version=="v7")  %>%
  #filter(grepl("Nan",name)) %>%
   ggplot() +
  geom_ribbon(aes(x = date, 
             ymin = exp_cases_lb/positive,
             ymax = exp_cases_ub/positive),
             alpha = .5,
             show.legend=FALSE,
             fill="#596281") +
   # geom_linerange(aes(xmin = mindate,
   #                    xmax=maxdate,
   #                    y = positive,
   #                    color = 'obs')) +
  facet_wrap(~name,ncol=3) +
 # facet_grid(name~vlabel, 
 #                 labeller=labeller(vlabel =as_labeller(
 #                   TeX, default=label_parsed))) +
  scale_y_continuous(labels = comma) +
  scale_x_date(date_breaks = "3 months", 
               date_labels = "%b %Y") +
  theme_c(
          legend.position = "top") +
  scale_fill_manual(values = pal)  +
  labs(x="",
       y = "Estimated Infections",
       title = "Ratio of Estimated to Observed Over Time",
       subtitle = "Counties in Massachusetts") +
  scale_color_manual(values = c('obs' = 'red'), labels = 'Positive Tests',
                     name = '') +
  guides(color = guide_legend(override.aes = list(linewidth =2)))

Concordance with Covidestim

wjoined <- wjoined %>%
  mutate(name = gsub("County, MA","", name))

ma_concordance <- wjoined %>%
  filter(biweek >=6) %>%
  # covidestim does not report estimates for Nantucket 
  filter(!grepl("Nantucket", name )) %>%
  select(-date) %>%
  distinct() %>%
  select(exp_cases_lb, exp_cases_ub,
         name, biweek, vlabel, infections) %>%
  mutate(in_interval = case_when(
    exp_cases_lb <= infections & 
      infections <= exp_cases_ub ~ "In Interval",
    exp_cases_lb  > infections  ~ "Below Interval",
    exp_cases_ub < infections ~ "Above Interval"
  ) ) %>%
  group_by(in_interval, name, vlabel) %>%
  summarize(n_per_county=n()) %>%
  group_by(vlabel, in_interval) %>%
  summarize(n_per_version = sum(n_per_county)) %>%
  group_by(vlabel) %>%
  mutate(total = sum(n_per_version)) %>%
  ungroup() %>%
  mutate(prop=n_per_version/total)



ma_concordance_by_county <- wjoined %>%
  filter(biweek >=6) %>%
  # covidestim does not report estimates for Nantucket 
  filter(!grepl("Nantucket", name )) %>%
  select(-date) %>%
  mutate(name = gsub("$", "", 
                            name, fixed=TRUE)) %>%
  select(exp_cases_lb, exp_cases_ub, 
         name, biweek, vlabel, infections) %>%
   distinct() %>%
  mutate(in_interval = case_when(
    exp_cases_lb <= infections & 
      infections <= exp_cases_ub ~ "In Interval",
    exp_cases_lb  > infections  ~ "Below Interval",
    exp_cases_ub < infections ~ "Above Interval"
  ) ) %>%
  group_by(in_interval, name, vlabel) %>%
  summarize(n_per_county=n()) %>%
  group_by(name,vlabel) %>%
  mutate(total = sum(n_per_county),
         prop = n_per_county/total) %>%
  mutate(state="MA") %>%
  ungroup()

Concordance by County

##############################################
# stacked bar chart of concordance by county
##############################################
ma_concordance_by_county %>%
  filter(vlabel=="$P(S_1|untested)$ Centered at Survey Value") %>%
  group_by(name) %>%
  mutate(m = prop[which(in_interval=="In Interval")]) %>%
  ungroup() %>%
  mutate(in_interval = factor(in_interval, levels = c("Above Interval",
                            "In Interval", "Below Interval"))) %>%
  ggplot(aes(x= fct_reorder(name,m), 
             fill = (in_interval), y =prop)) + 
  geom_bar(stat="identity",position="stack") +
  theme_c() +
  coord_flip()+
  scale_fill_manual(values=c("In Interval" = "#79D2AF",
                             "Above Interval" = "#D2AF79",
                      "Below Interval"="#7997D2")) +
  labs(x="", 
       y= "Proportion",
       fill = "Covidestim Median:",
       title = "Massachusetts: Proportion Where Covidestim Median\nWas Within, Above, or Below the Median",
       subtitle = TeX("For Implementation with $P(S_1|untested)$ Centered at Survey Value")) +
  theme_c() +
  theme(legend.position="right",
          legend.title = element_text(face="bold", size = 15),
          legend.text= element_text( size = 15),
          legend.spacing.y = unit(3, 'pt'),
          axis.text.y = element_text(size = 17, hjust=1))

Concordance by Version

# 
in_interval <- ma_concordance %>%
  filter(in_interval == "In Interval") %>%
  mutate(n_interval = n_per_version) %>%
  select(vlabel, n_interval)

labels <- ma_concordance %>%
  filter(in_interval=="In Interval") %>%
  arrange((prop)) %>%
  pull(vlabel) %>%
  unique() %>%
  as.character()


#########################################################
# CONCORDANCE BY VERSION 
#########################################################
ma_concordance %>%
  left_join(in_interval) %>%
  mutate(in_interval=factor(in_interval, 
                            levels=c("Above Interval",
                                      "In Interval",
                                     "Below Interval"))) %>%
  ggplot(aes(y =fct_reorder(vlabel,n_interval),
             x = prop, 
             fill=in_interval)) +
  #facet_wrap(~fct_reorder((vlabel),m), ncol=1) +
  geom_bar(stat="identity",position="stack", color="darkgray", linewidth=.2) +
   theme_c(axis.text.y = element_text(size = 14, hjust=1),
          axis.title.x = element_text(size=14),
          axis.text.x=element_text(size=10),
          legend.position="right",
          legend.title = element_text(face="bold", size = 15),
          legend.text= element_text( size = 15),
          legend.spacing.y = unit(3, 'pt')) +
  scale_y_discrete(labels=unname(TeX(labels))) +
  labs(y ="",
       x="Proportion Containing\nthe Covidestim Median",
       fill = "Covidestim Median:",
       title = "Where Covidestim Medians Fall\nRelative to Probabilistic Bias Intervals",
       subtitle = "By Implementation, in Massachusetts") +
  scale_fill_manual(values=c("Below Interval"="#7997D2",
                             "In Interval" = "#79D2AF",
                             "Above Interval" = "#D2AF79"),
                     breaks = c("Below Interval",
                               "In Interval",
                               "Above Interval")) +
  guides(fill=guide_legend(byrow=TRUE)) +
  scale_x_continuous(n.breaks = 7,
                     expand=c(0,0),
                     limits=c(-.05,1))

ggsave(here('figures/concordance-covidestim-county.pdf'))
ma_concordance %>%
  group_by(vlabel) %>%
  mutate(total=sum(n_per_version)) %>%
  filter(in_interval == "In Interval") %>%
  mutate(n_interval = n_per_version) %>%
  ggplot(aes(y=fct_reorder(vlabel,prop), x=prop)) +
  geom_text(aes(label=round(prop,3), x= prop+.03), 
             angle=0) +
  geom_bar(stat="identity", fill="#596281") +
  scale_y_discrete(breaks= unique(ma_concordance$vlabel),
                   labels=function(x)TeX(x)) +
  scale_x_continuous(expand=c(0,0), 
                     limits=c(0,.9), n.breaks=7)  +
  theme_c(axis.text.y = element_text(size = 13, hjust=1),
          axis.title.x = element_text(size=14),
          axis.text.x=element_text(size=10)) +
  labs(y="",
       x="Proportion Containing the Covidestim Median",
       title="Proportion Containing the Covidestim Median",
       subtitle = "By Implementation")

Faceted by Version

county %>% 
  mutate(vlabel=fct_reorder(vlabel,as.numeric(vlabel),.desc=TRUE)) %>%
   ggplot() +
  geom_ribbon(aes(x = date, 
             ymin = exp_cases_lb,
             ymax = exp_cases_ub),
             alpha = .8,
             show.legend=FALSE,
             fill="#596281") +
  facet_grid(name~vlabel, 
             labeller=labeller(vlabel =as_labeller(
               TeX, default=label_parsed)),
             scales="free") +
  scale_y_continuous(labels = comma) +
  scale_x_date(date_breaks = "3 months", 
               date_labels = "%b %Y") +
  theme_c(
          legend.position = "top",
          ) +
  scale_fill_manual(values = pal)  +
  labs(x="",
       y = "Estimated Infections",
       title = "Simulation Intervals Over Time",
       subtitle = "Counties in Massachusetts") +
  guides(color = guide_legend(override.aes = list(linewidth =2)))

ggsave(here('figures/simulation-intervals-faceted-version-county.pdf'))
county$vlabel %>% unique()
## [1] $P(S_1|untested)$ Centered at Survey Value              
## [2] $\\beta$ Centered at Survey Value                       
## [3] $P(S_1|untested)$ and $\\beta$ Centered at Survey Values
## [4] $Priors\\,Do\\,Not\\,Vary\\,by\\,State\\,and\\,Date$    
## 4 Levels: $Priors\\,Do\\,Not\\,Vary\\,by\\,State\\,and\\,Date$ ...
pal
##               $P(S_1|untested)$ Centered at Survey Value 
##                                                "#10BAC5" 
## $P(S_1|untested)$ and $\\beta$ Centered at Survey Values 
##                                                "#1B10C5" 
##     $Priors\\,Do\\,Not\\,Vary\\,by\\,State\\,and\\,Date$ 
##                                                "#EFB719" 
##                        $\\beta$ Centered at Survey Value 
##                                                "#900C3F"
county %>% 
    mutate(vlabel=fct_reorder(vlabel,as.numeric(vlabel),.desc=TRUE)) %>%
  filter(biweek>=6) %>%
   ggplot() +
  geom_ribbon(aes(x = date, 
             ymin = exp_cases_lb,
             ymax = exp_cases_ub,
             fill=vlabel),
             alpha = .5) +
  facet_wrap(~name,
             scales="free_y") +
  scale_y_continuous(labels = comma) +
  scale_x_date(date_breaks = "3 months", 
               date_labels = "%b %Y") +
  theme_c(
          legend.position = "top",
          legend.text=element_text(hjust=0)
          ) +
  scale_fill_manual(values = pal,
                    labels=TeX(unique(as.character(county$vlabel))))  +
  labs(x="",
       fill="",
       y = "Estimated Infections",
       title = "Simulation Intervals Over Time",
       subtitle = "Counties in Massachusetts") +
  guides(fill=guide_legend(nrow=2))

ggsave(here('figures/simulation-intervals-color-version-county.pdf'))